knitr::opts_chunk$set(echo = TRUE, include=TRUE, warning = FALSE, message=FALSE)
This in a adaptation of the workflow for clustering and pseudotime analysis of flow cytometry data presented in Melsen et al. (2020). The workflow consists of four main sections: data preprocessing (transformation and normalization), clustering, dimensionality reduction and trajectory / pseudotime analysis.
REFERENCE:
Melsen JE, van Ostaijen-Ten Dam MM, Lankester AC, Schilham MW, van den Akker EB. A compreensive Workflow for Applying Single-Cell Clustering and Pseudotime Analysis to Flow Cytometry Data. J Immunol. 2020 Aug 1;205(3):864-871. doi: 10.4049/jimmunol.1901530. Epub 2020 Jun 26. PMID: 32591399.
# Clear the environment
rm(list = ls())
########################
#### LOAD LIBRARIES ####
########################
# for R markdown
library("knitr")
library("rmarkdown")
library("pander")
# for visualization
library(ggplot2) # for creating plots
library(scales) # control the appearance of axis and legend labels.
library(RColorBrewer) # manage colors with R. It offers several color palettes
library(ggrepel) # geoms for ggplot2 to repel overlapping text labels
library(ggcyto) # visualization for cytometry data
library(ggforce) # extra funtionalities for ggplots
library(grDevices) # support for Colours and Fonts.
library(pheatmap) # heatmaps
library(ggridges) # geoms for ggplot2 for ridgeplots
library(gridExtra) # arrange multiple plots on a page
# Flow cytometry (single cell) data analysis
library(flowCore) # flowSets and Logicle transformation
library(flowWorkspace) # gating
library(flowVS) # Arcsinh transformation
library(flowStats) # fdaNorm normalization
library(FlowSOM) # flowSOM clustering and metaclustering
library(openCyto) # gating
library(CATALYST) # clustering, dimensionality reduction, ploting
library(SingleCellExperiment) # sce objects
library(scater) # pre-processing, quality control, normalisation and visualisation of single-cell RNA-seq data
#library(uwot)
# other general
library(Biobase) # Base functions for Bioconductor
library(reshape2) # function for conversion between wide-format and long-format data
library(dplyr) # functions for data manipulation
library(tidyr) # functions for "tidying up" data
# Diffusion maps
#BiocManager::install("destiny")
library(destiny)
# Pseudotime / trajectory analysis
#BiocManager::install("slingshot")
library(slingshot)
# phenograph algorithm
#devtools::install_github("JinmiaoChenLab/cytofkit2", dependencies=TRUE)
library(cytofkit2)
#########################################
########## USEFULL FUNCTIONS ############
#########################################
# dowsampling a flowset
Downsampling_FlowSet <- function(x, samplesize , replace=TRUE, prob=NULL, stratified = FALSE){
if(stratified) {
total_size <- sum(fsApply(x, nrow)[,1])
flowCore::fsApply(x, function(ff){
i <- sample(nrow(ff), size = nrow(ff)*samplesize/total_size, replace=replace, prob)
ff[i,]
})
} else {
if(missing(samplesize))
samplesize <- min(flowCore::fsApply(x,nrow))
flowCore::fsApply(x, function(ff){
i <- sample(nrow(ff), size = samplesize, replace=replace, prob)
ff[i,]
})
}
}
# add labels to a ggplot
add_label <- function(object, dimred, text_by = "label", text_size = 5, text_colour = "gray12", box_padding=5) {
text_out <- retrieveCellInfo(object, text_by, search = "colData")
text_out$val <- as.factor(text_out$val)
red_dim <- as.matrix(reducedDim(object, dimred))
df_to_plot <- data.frame(red_dim[, seq_len(2), drop = FALSE])
colnames(df_to_plot)[seq_len(2)] <- c("X", "Y")
by_text_x <- vapply(split(df_to_plot$X, text_out$val), median, FUN.VALUE = 0)
by_text_y <- vapply(split(df_to_plot$Y, text_out$val), median, FUN.VALUE = 0)
ggrepel::geom_text_repel(data = data.frame(x = by_text_x, y = by_text_y, label = names(by_text_x)),
mapping = aes(x = x, y = y, label = label),
inherit.aes = FALSE, size = text_size, colour = text_colour, force = 1,
box.padding = box_padding, max.overlaps = Inf, min.segment.length = 0, segment.color="gray12",
segment.alpha=0.5)
}
# Scale values in a vector between 0 and 1
scale_values <- function(x){(x-min(x, na.rm = TRUE))/(max(x, na.rm = TRUE)-min(x, na.rm = TRUE))}
# Trim extreme values in a vector
trim_values <- function(x, q) {
x[x < quantile(x, q)] <- quantile(x, q)
x[x > quantile(x, 1-q)] <- quantile(x, 1-q)
return(x)
}
We use the flow cytometry data from T cells of eight healthy bone marrow donors, which was originaly presented in the study by Oetjen et al. (2018). The raw flow cytometry data (no preprocessing) is available for download from the FlowRepository website (accession FR-FCM-ZYQ9).
REFERENCE:
Oetjen KA, Lindblad KE, Goswami M, Gui G, Dagur PK, Lai C, Dillon LW, McCoy JP, Hourigan CS. Human bone marrow assessment by single-cell RNA sequencing, mass cytometry, and flow cytometry. JCI Insight. 2018 Dec 6;3(23):e124928. doi: 10.1172/jci.insight.124928. PMID: 30518681; PMCID: PMC6328018.
Before proceeding to the analyses in R, initial preprocessing steps are usually performed using conventional gating software (e.g. FlowJo). In Melsen et al. (2020), compensation, cleaning (exclusion of dead cells and doublets) and T cell gating was performed previously to analysis in R.
The preprocessed FCS files are imported into R with the flowCore package. A data frame of the panel of markers is created, and markers are classified as âtypeâ (if used for clustering and dimensionality reduction) or ânoneâ (otherwise).
###############################
##### LOAD FCS FILES ##########
###############################
# path to the directory with the fcs files
fcs.dir<- file.path( "/cloud/project/course_datasets/FR_FCM_ZYQ9")
# read fcs files into a flowSet
dfs_fs <- read.flowSet(path=fcs.dir, pattern="*.fcs", transformation = FALSE, truncate_max_range = FALSE)
###############################
##### PLOT THE PANEL ##########
###############################
# create the panel
channels <- colnames(dfs_fs)
antigen <- pData(parameters(dfs_fs[[1]]))$desc
marker_class <- rep("none",length(antigen))
marker_class[antigen %in% c("CD95","CD8","CD27","CCR7","CD45RA","CD3","CD49b","CD4","CD69","CD103")] <- "type"
panel <- data.frame(channels = channels, antigen= antigen, marker_class = marker_class)
# Plot the panel
my_table <- panel
rownames(my_table) <- NULL
pandoc.table(my_table,
style="multiline",
split.table=150,
split.cells=c(12,8,8), # width of columns
use.hyphening=TRUE,
caption="Panel", # the title of the table
justify = c('left',"right", 'right')) # how text is justified in table
| channels | antigen | marker_class |
|---|---|---|
| FSC-A | FSC.A | none |
| FSC-H | FSC.H | none |
| FSC-W | FSC.W | none |
| SSC-A | SSC.A | none |
| SSC-H | SSC.H | none |
| SSC-W | SSC.W | none |
| R660-A | CD95 | type |
| R780-A | CD8 | type |
| B515-A | CD27 | type |
| B710-A | CXCR4 | none |
| V450-A | CCR7 | type |
| V545-A | LIVE.DEAD | none |
| V605-A | CD4 | type |
| V655-A | CD45RA | type |
| V800-A | CD3 | type |
| G560-A | CD49b | type |
| G610-A | CD14.19 | none |
| G660-A | CD69 | type |
| G780-A | CD103 | type |
| Time | TIME | none |
# change colnames to antigens
colnames(dfs_fs)[!is.na(antigen)] <- antigen[!is.na(antigen)]
panel$channels <- colnames(dfs_fs)
According to the authors, CXCR4 was removed from the analysis because no reliable CXCR4 staining was observed, and CD14 was only used for the initial gating of T cells. We classified both markers as ânoneâ, since we are not going to use them.
In this step, we transform the data from the original to a scale in which positive and negative peaks are better identified. Because each fluorochrome has a distinct staining pattern, each requires an individual transformation, and manual inspection and adjustment of the transformations is essential.
We compare three methods: hyperbolic arcsine (arcsinh) transformation (as implemented in FlowVS) using selected cofactors; arcsinh transformation with cofactors estimated by flowVS; and Logicle transformation as implemented in flowCore.
Data before the transformation:
densityplot(~., channels = panel$channels[panel$marker_class=="type"], dfs_fs, main="Data before any transformation")
FlowVS (arcsinh) and FlowCore (logicle) allow transformations with estimated parameters.
################################################################
####### ARCSINH TRANSFORMATION WITH ESTIMATED COFACTORS ########
################################################################
# Automated arcsinh parameter optimized transformation
# automatedcofactors <- estParamFlowVS(dfs_fs, panel$channels[panel$marker_class=="type"]) #this may take a while.
# It is advised to save your cofactor data as an csv or excel file, this is for reproducibility purposes.
# save(automatedcofactors, file = "/cloud/project/course_datasets/FR_FCM_ZYQ9/automatedcofactors.RData")
load("/cloud/project/course_datasets/FR_FCM_ZYQ9/automatedcofactors.RData")
dfs_fs_t_auto <- transFlowVS(dfs_fs, channels=panel$channels[panel$marker_class=="type"], cofactor=automatedcofactors)
## Transforming flowSet by asinh function with supplied cofactors.
# density plots
densityplot(~., channels = panel$channels[panel$marker_class=="type"], dfs_fs_t_auto, main="Data after arcsinh transformation with estimated factors (estParamFlowVs function from FlowVS)")
#######################################
####### LOGICLE TRANSFORMATION ########
#######################################
# create an empty list
fcs_list <- list()
# iterate over flowFrames
for(i in 1:length(dfs_fs)){
# estimate parameters for transformation
algcl <- estimateLogicle(dfs_fs[[i]],
channels = panel$channels[panel$marker_class=="type"])
# transform and add to list
fcs_list[[i]] <- transform(dfs_fs[[i]], algcl)
}
# convert list of flowFrames to a a flowSet
dfs_fs_t_logicle <- as(fcs_list, "flowSet")
# make sure the sample names and the metadata are transfered to te new flowSet object
sampleNames(dfs_fs_t_logicle) <- sampleNames(dfs_fs)
pData(dfs_fs_t_logicle) <- pData(dfs_fs)
# density plot
densityplot(~., channels = panel$channels[panel$marker_class=="type"], dfs_fs_t_logicle, main="Data after Logicle transformation (FlowCore)")
Methods with estimated parameters work satisfactorily, except for markers that have low-intensity expression or are expressed at low frequency (in this case, CD103, CD49b and CCR7).
The cofactor of the arcsinh transformation equals the size of the linear region on the positive or negative side of the zero. Manually adjusting the cofactors for each fluorochrome allows a better tuning of this linear region.
Manually adjusted cofactors can be deduced by adjusting the linear size of the axis on a biexponential scale, as plotted in conventional gating software.
If optimal cofactors differ (slightly) between samples, the highest cofactor should be chosen, as long as no false-negative peaks are observed.
##############################################
####### MANUAL ARCSINH TRANSFORMATION ########
##############################################
## Each parameter of interest needs to be arcsinh transformed with an individual cofactor.
# The cofactor can be deduced from the size of the linear region around zero on a biexponential scale, as plotted in a histogram (in conventional gating software).
# Choose manual transformation or automated transformation (we prefer manual)
# Define parameters and cofactors for transformations:
# manual hyperbolic arcsinh transformation (from Melsen et al. 2020)
manualcofactors <- c(CD95=524,CD8=262,CD27=263,CCR7=1787,CD45RA=524,CD3=678,CD49b=398,CD4=915,CD69=830,CD103=504)
dfs_fs_t_manual <- transFlowVS(dfs_fs, channels=names(manualcofactors), cofactor=manualcofactors)
## Transforming flowSet by asinh function with supplied cofactors.
# density plot
densityplot(~., channels = panel$channels[panel$marker_class=="type"], dfs_fs_t_manual, main="Data after arcsinh transformation with manually adjusted cofactors (flowVS)")
After a visual inspection, the manual arcsinh transformation with adjusted cofactors resulted in the most optimal distributions, and was selected for downstream analyses.
Technical inter-sample variation in the data (âbatch effectsâ) requires correction before the analysis (ânormalizationâ).
For some of the markers (CD49b, CD4, CD27, CCR7, CD8 and CD3) we observe subtle differences in signal intensities between the donors.
Here, we demonstrate the use of the fdaNorm method, as implemented in the flowStats package, to correct such differences. Compared to other methods, fdaNorm has the advantage of automatically detect the number of peaks from the data.
##############################
######## NORMALIZATION #######
##############################
## To correct for technical inter-sample variation we apply normalization by fdaNorm (which automatically detects the number of peaks)
# This method is implemented in the flowStats package, a collection of algorithms for the statistical analysis of flow cytometry data.
# The warpSet function can be used to normalize data according to a set of landmarks, which essentially are the peaks or high-density areas in the density estimates
# Select the markers which require normalization (based on the densityplots you generated above). Be aware that you don't remove biological variation!
dfs_fs_t_manual_normfda <- warpSet(dfs_fs_t_manual, stains=c('CD8','CD27','CCR7','CD3','CD49b','CD4'))
##
Estimating landmarks for channel CD8 ...
Estimating landmarks for channel CD27 ...
Estimating landmarks for channel CCR7 ...
Estimating landmarks for channel CD3 ...
Estimating landmarks for channel CD49b ...
Estimating landmarks for channel CD4 ...
## Registering curves for parameter CD8 ...
## Registering curves for parameter CD27 ...
## Registering curves for parameter CCR7 ...
## Registering curves for parameter CD3 ...
## Registering curves for parameter CD49b ...
## Registering curves for parameter CD4 ...
# Data after normalization
densityplot(~CD8+CD27+CCR7+CD3+CD49b+CD4, dfs_fs_t_manual_normfda, main="Data after normalization with fdaNorm (flowStats)")
In the workflow presented by Melsen et al. (2020), the normalized data was exported as FCS files and imported into Cytosplore software. Using Cytosplore, they performed a HSNE analysis (HSNE dimensionality reduction and GMS clustering) to select CD4+ T cells. As these methods are not implemented in R, we instead performed automated gating of the CD4+ T cells using the openCyto package and a CD8 / CD4 quadrant gate.
#######################################
######### Gate CD4+ T CELLS ###########
#######################################
# convert the flowSet to a gatingSet
gs <- GatingSet(dfs_fs_t_manual_normfda)
# apply automated quadrant gate with openCyto
gs_add_gating_method(gs,
parent = "root",
alias = "*",
dims = "CD4,CD8",
gating_method = "gate_mindensity",
pop = "-/++/-")
# Verify the gating hierarchy
# plot(gs)
# OPTION 1: gate the "CD4+CD8-" cells (2D gate)
autoplot(gs[[1]], gate=gs_pop_get_children(gs,"root")[3:6], bins = 256)
cs <- gs_pop_get_data(gs, "CD4+CD8-")
dfs_fs_t_manual_normfda_CD4 <- cytoset_to_flowSet(cs)
# OPTION 2: gate the "CD4+" cells (1D gate)
# autoplot(gs[[1]], gate=gs_pop_get_children(gs,"root")[1:2],y = "SSC.A")
# cs <- gs_pop_get_data(gs, "CD4+")
# dfs_fs_t_manual_normfda_CD4 <- cytoset_to_flowSet(cs)
Optional step to reduce the computational load. Two options: random sampling (random selection of n cells per sample) or density-dependent sampling (random selection of same proportion of cells per sample).
For this demonstration, and in order to reduce the computational load, we perform density-dependent downsampling to a total of 40â000 cells,
# Check numer of cells per sample
my_pData <- pData(dfs_fs_t_manual_normfda_CD4)
my_pData$sample_id <- gsub(".fcs","",my_pData$name)
my_pData$sample_id <- gsub("CD3_","",my_pData$sample_id)
my_pData$ncells <- as.vector(fsApply(dfs_fs_t_manual_normfda_CD4, nrow))
barplot(height=my_pData$ncells,
names=my_pData$sample_id,
ylab = "Nr of cells", main = "Nr of cells per sample before downsampling" )
######################################
######## OPTIONAL: DOWNSAMPLE ########
######################################
## OPTION 1 :
# downsample to 5'000 cells per sample (random downsample)
# set.seed(1234)
# dfs_fs_t_manual_normfda_small <- Downsampling_FlowSet(dfs_fs_t_manual_normfda_CD4, samplesize = 5000, replace = FALSE, stratified = FALSE)
## OPTION 2 : proportional to sample size (density-dependent downsampling)
# downsample to 40'000 cells in total
set.seed(1234)
dfs_fs_t_manual_normfda_small <- Downsampling_FlowSet(dfs_fs_t_manual_normfda_CD4, samplesize = 40004, replace = FALSE, stratified=TRUE)
my_pData$ncells <- as.vector(fsApply(dfs_fs_t_manual_normfda_small, nrow))
barplot(height=my_pData$ncells,
names=my_pData$sample_id,
ylab = "Nr of cells" ,main = "Nr of cells per sample after downsampling")
The aim of this step, is to identify groups of phenotypically similar cells. Clustering can be achieved by applying general clustering methods (such as hierarchical clustering or K-means) or methods specifically developed for cytometry data, such as FlowSOM (as implemented in the CATALYST package) and Phenograph (implemented in the cytofkit2 package (https://github.com/JinmiaoChenLab/cytofkit2).
We demonstrate the use of FlowSom and Phenograph for clustering. Both methods allow tuning of the number of clusters, which were set to 14, as in Melsen et al. (2020).
After clustering, phenotypes can be assigned to clusters by inspection of heatmaps and ridgeplots showing the expression of markers in each cluster (annotation).
###################################
######## CREATE SCE OBJECT ########
###################################
# Create a SingleCellExperiment (sce) object for CATALYST
# If 'md' is unspecified, the flowFrame/Set identifier(s) will be used as sample IDs with no additional metadata factors.
sce <- prepData(x = dfs_fs_t_manual_normfda_small,
panel = panel,
panel_cols = list(channel = "channels", antigen = "antigen", class = "marker_class"),
md = my_pData,
md_cols = list(file = "name",id = "sample_id", factors = "ncells"),
transform = FALSE,
FACS=TRUE,
features = panel$antigen[panel$marker_class!="none"])
# change the assay name to "exprs"
assayNames(sce) <- "exprs"
####################################
######## FLowSOM CLUSTERING ########
####################################
# Generate clusters by FlowSOM
sce <- cluster(sce,
features = "type",
maxK = 14,
verbose = FALSE,
seed = 100)
# Get metaclustering per cell
sce$clusters_flowsom <- cluster_ids(sce, k = "meta14")
##################################################
# Heatmap with expression of markers by cluster #
##################################################
# OPTION 1: no scaling of values
plotExprHeatmap(sce,features = type_markers(sce),
k = "meta14", by = "cluster",
scale = "never", q=0.01, col_clust = FALSE, row_anno = FALSE, bars = FALSE) +
ggtitle("Heatmap with expression of markers per FlowSom cluster")
# OPTION 2: first aggregate values by cluster (median), then scale from 0 to 1
# plotExprHeatmap(sce,features = type_markers(sce),
# k = "meta14", by = "cluster",
# scale = "first", q=0.01, col_clust = FALSE, row_anno = FALSE, bars = FALSE) +
# ggtitle("FlowSom clusters")
# OPTION 3: first scale values from 0 to 1, then aggregate by cluster (median)
# plotExprHeatmap(sce,features = type_markers(sce),
# k = "meta14", by = "cluster",
# scale = "last", q=0.01, col_clust = FALSE, row_anno = FALSE, bars = FALSE) +
# ggtitle("FlowSom clusters")
The heatmap shows median expression of markers per cluster.
#####################################################
# Ridgeplots with expression of markers by cluster #
#####################################################
plotClusterExprs(sce,
features = "type",
k = "meta14") +
ggtitle("Ridgeplots with expression of markers per FlowSOM clusters")
We can also plot the frequency of clusters per sample:
###############################################
# check frequency of each cluster per sample #
###############################################
plotAbundances(sce, k="meta14", by="sample_id",group_by = "sample_id") +
ggtitle("Abundance of flowSOM clusters")
#######################################
######## Phenograpg CLUSTERING ########
#######################################
## Generate clusters by Phenograph (based on Louvain clustering)
# the higher the K nearest neighbours, the lower the number of clusters
# extract the expression matrix from the sce object
df <- t(assay(sce,"exprs"))[,type_markers(sce)]
# run phenograph
phenograph <- Rphenograph(df, k=50)
# add phenograp clusters to sce object
sce$clusters_phenograph <- as.factor(phenograph$membership)
# OPTIONAL: save the sce object
#save(sce, file = "/cloud/project/course_datasets/FR_FCM_ZYQ9/sce_after_clustering.RData")
#load("/cloud/project/course_datasets/FR_FCM_ZYQ9/sce_after_clustering.RData")
##################################################
# Heatmap with expression of markers by cluster #
##################################################
# We define a custom function for creating heatmaps with expression values per cluster from a sce object
plot_expression_heatmap <- function(my_sce, features,cell_clustering, scale = "never", q = 0.01, title = "") {
# retrieve the expression
expression <- t(assay(sce,"exprs")[features,])
# scale
# "first": scale & trim then aggregate
# "last": aggregate then scale & trim
# "never": aggregate only
# scale = "first"
if(scale=="first") {
# trim
expression <- apply(expression,2, function(x) trim_values(x, q))
# scale
expression <- apply(expression,2, function(x) scale_values(x))
}
# Calculate the median expression
expr_median <- aggregate(. ~ cell_clustering,
data= data.frame(expression, cell_clustering = my_sce@colData[,cell_clustering]),
FUN=median)
expr_heat <- as.matrix(expr_median[, colnames(expression)])
rownames(expr_heat) <- expr_median$cell_clustering
if(scale=="last") {
# trim
expr_heat <- apply(expr_heat,2, function(x) trim_values(x, q))
# scale
expr_heat <- apply(expr_heat,2, function(x) scale_values(x))
}
# This clustering is based on the markers that were used for the main clustering
d <- dist(expr_median[, colnames(expression)], method = "euclidean")
cluster_rows <- hclust(d, method = "average")
# Colors for the heatmap
color <- colorRampPalette(rev(brewer.pal(n = 9, name = "RdYlBu")))(100)
pheatmap(expr_heat,
color = color,
cluster_cols = FALSE,
cluster_rows = cluster_rows,
labels_col = colnames(expr_heat),
labels_row = rownames(expr_heat),
display_numbers = TRUE, number_color = "black",
fontsize = 8, fontsize_number = 0.001, main = title)
}
# run the function
plot_expression_heatmap(my_sce = sce,features = type_markers(sce), cell_clustering = "clusters_phenograph",scale = "never", q = 0.01, title = "Heatmap with expression of markers per Phenograph cluster")
#####################################################
# Ridgeplots with expression of markers by cluster #
#####################################################
# custom function for creating a ridgeplot with expression values per cluster from a sce object
ridge_plot <- function(my_sce, features, cell_clustering, col_palette=NULL ){
# retrieve the expression
expression <- as.data.frame(t(assay(sce,"exprs")[features,]))
expression$cluster <- my_sce@colData[, cell_clustering]
expression_long <- gather(expression, marker, expression, features, factor_key=TRUE)
p <- ggplot(expression_long, aes(x = expression, y = cluster)) +
geom_density_ridges(aes(fill = cluster)) +
theme_linedraw() +
theme(legend.position = "none") +
facet_wrap(~ marker, nrow = 1, scales="free_x")
if(!is.null(col_palette)) {
p <- p + scale_fill_manual(values = col_palette)
}
return(p)
}
# run the function
ridge_plot(my_sce = sce, features = type_markers(sce),cell_clustering = "clusters_phenograph",col_palette = NULL ) +
ggtitle("Ridgeplots with expression of markers per Phenograph cluster")
###############################################
# check frequency of each cluster per sample #
###############################################
cluster_abundances <- as.data.frame(table(sce$sample_id,sce$clusters_flowsom))
colnames(cluster_abundances) <- c("sample_id","cluster","Freq")
ggplot(cluster_abundances, aes(fill=cluster, y=Freq, x=sample_id)) +
geom_bar(position="fill", stat="identity") +
xlab("") +
ylab("Proportion of cells") +
ggtitle("Abundance of Phenograph clusters") +
theme_classic()
For visualization purposes, we need to reduce the number of dimensions in the high-dimensional data to two, in such a way that the original cellular heterogeneity in the dataset is still comprehensively represented.
A wide range of dedicated dimensionality reduction methods are available in R, but t-stochastic neighbor embedding (tSNE), uniform manifold approximation and projection (UMAP) and diffusion maps are often used. Each of this method has its own limitations and strengths. Both UMAPs and diffusion maps better preserve the global structure of the data, and are more appropriate for pseudotime analysis.
Here, we demonstrate the dimensionality reduction using UMAP (as implemented in the CATALYST package) and diffusion maps (as implemented in the destiny package ( https://bioconductor.org/packages/release/bioc/vignettes/destiny/inst/doc/Diffusion-Maps.html).
We will use the same âtypeâ markers as we did for clustering.
We plot the cells in DR space with clusters and expression of markers superimposed, in order to help assign phenotypes to clusters.
##########################################
######## DIMENSIONALITY REDUCTION ########
##########################################
## Diffusion map
# WITHOUT DOWNSAMPLING (275'856 cells), THIS WILL TAKE APPROXIMATELY 2 HOURS
# Reduce the K, if computational load is too high (or downsample !)
# If downsampled to 40'000 cells, it will take approximately 5 minutes
dm <- DiffusionMap(df,
k=1000,
suppress_dpt = TRUE,
verbose=TRUE)
## finding knns......done. Time: 258.67s
## Calculating transition probabilities......done. Time: 2.78s
##
## performing eigen decomposition......done. Time: 19.53s
# add the diffusion components to the reduced dimensions slot of the sce object
reducedDim(sce, "DiffusionMap") <- as.data.frame(dm)[,c("DC1","DC2")]
## UMAP
# check different n_neighbours (controls how UMAP balances local versus global structure in the data) for your UMAP plot
# check min_dist (controls how tightly UMAP is allowed to pack points together, low values=clumpier embeddings) for your UMAP plot
sce <- runDR(sce,
assay = "exprs",
dr = "UMAP",
features = "type",
cells = NULL,
n_neighbors = 30,
min_dist=0.001)
# OPTIONAL: save the sce object after this step
# save(sce, file = "/cloud/project/course_datasets/FR_FCM_ZYQ9/sce_after_clustering_and_dr.RData")
# load("/cloud/project/course_datasets/FR_FCM_ZYQ9/sce_after_clustering_and_dr.RData")
#####################
# Vizualize results #
#####################
plot1 <- plotDR(sce,
dr = "DiffusionMap",
assay = "exprs",
color_by="clusters_flowsom") +
add_label(sce,dimred = "DiffusionMap",text_by = "clusters_flowsom",box_padding = 1) +
theme_linedraw() +
theme(legend.position = "none") +
xlab("DC 1") +
ylab("DC 2") +
ggtitle("Diffusion Map with flowSOM clusters superimposed")
plot2 <- plotDR(sce,
dr = "DiffusionMap",
assay = "exprs",
color_by="clusters_phenograph") +
add_label(sce,dimred = "DiffusionMap",text_by = "clusters_phenograph",box_padding = 1) +
theme_linedraw() +
theme(legend.position = "none") +
xlab("DC 1") +
ylab("CD 2") +
ggtitle("Diffusion Map with Phenograph clusters superimposed")
plot3 <- plotDR(sce,
dr = "UMAP",
assay = "exprs",
color_by="clusters_flowsom") +
add_label(sce,dimred = "UMAP",text_by = "clusters_flowsom",box_padding = 1) +
theme_linedraw() +
theme(legend.position = "none") +
xlab("UMAP 1") +
ylab("UMAP 2") +
ggtitle("UMAP with flowSOM clusters superimposed")
plot4 <- plotDR(sce,
dr = "UMAP",
assay = "exprs",
color_by="clusters_phenograph") +
add_label(sce,dimred = "UMAP",text_by = "clusters_phenograph",box_padding = 1) +
theme_linedraw() +
theme(legend.position = "none") +
xlab("UMAP 1") +
ylab("UMAP 2") +
ggtitle("UMAP with Phenograph clusters superimposed")
grid.arrange(plot1,plot2,plot3,plot4, ncol=2, nrow=2)
# UMAP
plotDR(sce,dr = "UMAP", color_by = type_markers(sce), ncol = 3) +
theme_linedraw() +
xlab("UMAP 1") +
ylab("UMAP 2") +
ggtitle("UMAP")
# UMAP
plotDR(sce,dr = "DiffusionMap", color_by = type_markers(sce), ncol = 3) +
theme_linedraw() +
xlab("DC 1") +
ylab("Dc 2") +
ggtitle("Diffusion Map")
As in Melsen et al. (2020), we manually merge phenotypically similar clusters to avoid overclustering. After a visual inspection of the DR, heatmaps and ridge plots, we decide to proceed with the Phenograph clusters, because these correspond better to clusters used in Melsen et al. (2020). We identify most of the same phenotypes, with the exception of CD49b+ Naive and CD8dim cells, which seem to be absent in our data. This might be caused by a different gating of the CD4+ T cells, a different clustering or both.
############################################
#### ANNOTATION OF PHENOGRAPH CLUSTERS #####
############################################
# create a df for the annotation
annotation <- data.frame(cluster_old = 1:14,
cluster_new = c("Naive","CD27+ EM","CD69+CD103- EM",
"CD27+ EM","CD27+ EM","CD27+ CM",
"CD27- EM","CD27+ EM","Naive",
"CD27- EM","Naive","Naive",
"CD103+ EM","EMRA"))
pandoc.table(setNames(annotation,c("Phenograph clusters","Merged clusters")) ,
style="multiline",
split.table=150,
split.cells=c(9,12), # width of columns
use.hyphening=TRUE,
caption="Annotation of clusters", # the title of the table
justify = c('left',"right")) # how text is justified in table
| Phenograph clusters | Merged clusters |
|---|---|
| 1 | Naive |
| 2 | CD27+ EM |
| 3 | CD69+CD103- EM |
| 4 | CD27+ EM |
| 5 | CD27+ EM |
| 6 | CD27+ CM |
| 7 | CD27- EM |
| 8 | CD27+ EM |
| 9 | Naive |
| 10 | CD27- EM |
| 11 | Naive |
| 12 | Naive |
| 13 | CD103+ EM |
| 14 | EMRA |
#########################################
#### MERGING OF PHENOGRAPH CLUSTERS #####
#########################################
# create a slot for the merged clusters
sce$merged_clusters <- annotation[as.numeric(sce$clusters_phenograph),"cluster_new"]
# plot expression of markers
plot_expression_heatmap(my_sce = sce, features = type_markers(sce),cell_clustering = "merged_clusters", scale = "never", q = 0.01, title = "Heatmap with expression of markers per phenotype")
# OPTION 1: create a random palette of colors for the phenotypes
# We can change the palette by changing the seed
# set.seed(1235)
# color = grDevices::colors()[grep('gr(a|e)y', grDevices::colors(), invert = T)]
# palette_clusters <- sample(color, length(unique(sce$merged_clusters)))
# names(palette_clusters) <- unique(sce$merged_clusters)
# OPTION 2: manually create a palette of colors for the phenotypes (ideal)
palette_clusters <- c(
"indianred1",
"yellowgreen",
"chartreuse",
"turquoise4",
"royalblue",
"palegreen3",
"orange2"
)
palette_clusters <- setNames(palette_clusters, c(
"CD27+ CM",
"CD27+ EM",
"CD69+CD103- EM",
"CD27- EM",
"Naive",
"CD103+ EM",
"EMRA" ))
# Plot UMAP with phenotypes
plot5 <- plotDR(sce,
dr = "UMAP",
assay = "exprs",
color_by="merged_clusters") +
add_label(sce,dimred = "UMAP",text_by = "merged_clusters",box_padding = 1) +
theme_linedraw() +
theme(legend.position = "none") +
xlab("UMAP 1") +
ylab("UMAP 2") +
ggtitle("UMAP with phenotypes superimposed") +
scale_color_manual(values = palette_clusters)
# Plot Diffusion Map
plot6 <- plotDR(sce,
dr = "DiffusionMap",
assay = "exprs",
color_by="merged_clusters") +
add_label(sce,dimred = "DiffusionMap",text_by = "merged_clusters",box_padding = 1) +
theme_linedraw() +
theme(legend.position = "none") +
xlab("UMAP 1") +
ylab("UMAP 2") +
ggtitle("Diffusion map with phenotypes superimposed") +
scale_color_manual(values = palette_clusters)
grid.arrange(plot5,plot6, nrow=1)
# PLot abundances
# OPTION 1 : stacked barplot
# cluster_abundances <- as.data.frame(table(sce$sample_id,sce$merged_clusters))
# colnames(cluster_abundances) <- c("sample_id","cluster","Freq")
# ggplot(cluster_abundances, aes(fill=cluster, y=Freq, x=sample_id)) +
# geom_bar(position="fill", stat="identity") +
# scale_fill_manual(values = palette_clusters) +
# xlab("") +
# ylab("Proportion of cells") +
# theme_classic() +
# ggtitle("Frequencies of CD4+ T cell phenotypes")
# OPTION 2: box plots
counts <- as.data.frame.matrix(table(sce$sample_id,sce$merged_clusters))
counts_percofsample = counts/rowSums(counts)*100
counts_percofsample$total <- rowSums(counts)
counts_percofsample$sample_id <- row.names(counts_percofsample)
counts_percofsample <- melt(counts_percofsample, id.vars=c('sample_id', 'total'), variable.name='cluster', value.name='frequency')
ggplot(counts_percofsample, aes(x=reorder(cluster, frequency, FUN=median), y=frequency)) +
geom_boxplot(position='dodge2', outlier.shape=NA) +
geom_jitter(aes(colour=sample_id), size=2) +
xlab('cluster') +
theme_classic() +
theme(axis.ticks.x = element_blank(),
text = element_text(size=12),
axis.text.x = element_text(angle = 45, hjust=1)) +
ggtitle("Frequencies of CD4+ T cell phenotypes")
# OPTION 3: barplots
# ggplot(counts_percofsample, aes(x=cluster, y=frequency, fill=cluster)) +
# geom_bar(stat='identity') +
# scale_fill_manual(values = palette_clusters) +
# facet_wrap(~ sample_id) +
# theme(axis.ticks.x = element_blank(),
# axis.text.x=element_text(angle=90, vjust=.9, hjust=1)) +
# ggtitle("Frequencies of CD4+ T cell phenotypes")
We replicate the CD45RA / CCR7 scatter plot presented in supplementary Figure 6 A of Melsen et al. (2020), showing the distribution of phenotypes.
# Other plots
my_data <- as.data.frame(t(assay(sce,"exprs")[c("CD45RA","CCR7"),]))
my_data$cluster <- sce$merged_clusters
ggplot(my_data,aes(x=CD45RA, y=CCR7, color=cluster)) +
geom_point(size=0.1, alpha=0.2) +
geom_density_2d(contour=T, h = c(0.5,0.5)) +
#stat_ellipse(show.legend = TRUE) +
ylim(-0.5,2) +
scale_color_manual(values = palette_clusters, name = "Phenotypes") +
theme_classic()
In trajectory or pseudotime analyses, we assume a continuum of cellular states exists and we aim to reveal cellular progression between these states by inferring trajectories (continuous paths of differentiation).
We demonstrate how to infer cell trajectories and reveal transitional cellular states from flow cytometry using Slingshot https://bioconductor.org/packages/release/bioc/html/slingshot.html. Slingshot uses the clustering to build a minimum spanning tree and determine trajectories. It then fits smooth curves in the DR space that corresponds to identified trajectories. Finally, a pseudotime value is assigned to each cell according to its progression along trajectories, given a starting cluster (naive CD4+ T cells ). Diffusion map is more appropriate for pseudotime analysis because it naturally reorders cells along potential trajectories.
##############################
#### PSEUDOTIME ANALYSIS #####
##############################
# use only the same 8 markers as in the paper
sce.slingshot <- sce[type_markers(sce),]
# run slingshot in a single function
sce.slingshot <- slingshot(sce.slingshot,
clusterLabels = "clusters_phenograph",
reducedDim = "DiffusionMap",
start.clus = "Naive")
# OPTION: save the sce object after pseudotime analysis
# save(sce.slingshot, file = "/cloud/project/course_datasets/FR_FCM_ZYQ9/sce_after_slingshot.RData")
# load("/cloud/project/course_datasets/FR_FCM_ZYQ9/sce_after_slingshot.RData")
###################
# VISUALIZATION #
###################
op <- par(no.readonly = TRUE)
par(mfrow = c(2,1))
# palette for pseudotime
colors <- rev(colorRampPalette(brewer.pal(11,'Spectral')[-6])(100))
plotcol <- colors[cut(sce.slingshot$slingPseudotime_1, breaks=100)]
# # Plot trajectories and pseudotime
plot(reducedDims(sce.slingshot)$DiffusionMap, col =palette_clusters[sce.slingshot$merged_clusters], pch=16, main = "Minimum spaning tree")
lines(SlingshotDataSet(sce.slingshot), lwd=2, col='black', type = "lineage")
legend("topleft", names(palette_clusters), cex = 1,
pch=19,
col=palette_clusters,
horiz=FALSE, title = "Phenotypes")
# # Plot trajectories and pseudotime
plot(reducedDims(sce.slingshot)$DiffusionMap, col = plotcol, pch=16, main = "Trajectories")
lines(SlingshotDataSet(sce.slingshot), lwd=2, col='black')
legend("topleft", as.character(seq(0,1,0.25)), cex = 1,
pch=15,
col=colors[c(1,25,50,75,100)],
horiz=FALSE, title = "Pseudotime")
par(op)
Here, an arbitrary scale from 0 = start to 1 = end is used to represent pseudotime.
Plots with individual trajectories:
op <- par(no.readonly = TRUE)
par(mfrow = c(3,1))
# extract pseudotime
pt <- slingPseudotime(sce.slingshot)
nms <- colnames(pt)
# plot 1st curve
plot(reducedDims(sce.slingshot)$DiffusionMap,
col = colors[cut(pt[,nms[1]], breaks = 100)] ,
pch=16, main = "Lineage 1")
lines(slingCurves(sce.slingshot)[[1]], lwd = 2, col = 'black')
legend("topleft", as.character(seq(0,1,0.25)), cex = 1,
pch=15,
col=colors[c(1,25,50,75,100)],
horiz=FALSE, title = "Pseudotime")
# plot 2nd curve
plot(reducedDims(sce.slingshot)$DiffusionMap,
col = colors[cut(pt[,nms[2]], breaks = 100)] ,
pch=16, main = "Lineage 2")
lines(slingCurves(sce.slingshot)[[2]], lwd = 2, col = 'black')
legend("topleft", as.character(seq(0,1,0.25)), cex = 1,
pch=15,
col=colors[c(1,25,50,75,100)],
horiz=FALSE, title = "Pseudotime")
# plot 3rd curve
plot(reducedDims(sce.slingshot)$DiffusionMap,
col = colors[cut(pt[,nms[3]], breaks = 100)] ,
pch=16, main = "Lineage 3")
lines(slingCurves(sce.slingshot)[[3]], lwd = 2, col = 'black')
legend("topleft", as.character(seq(0,1,0.25)), cex = 1,
pch=15,
col=colors[c(1,25,50,75,100)],
horiz=FALSE, title = "Pseudotime")
par(op)
Notice that pseudotime is recomputed relative to each trajectory (0 = start, 1 = end).
Finally, we plot the position of cells assigned to each cluster as a function of pseudotime:
my_data <- data.frame(cluster = sce.slingshot$merged_clusters,
Pseudotime = slingPseudotime(sce.slingshot))
# add lineage pseudotime
my_data$Pseudotime.Lineage1 <- scale_values(my_data$Pseudotime.Lineage1)
my_data$Pseudotime.Lineage2 <- scale_values(my_data$Pseudotime.Lineage2)
my_data$Pseudotime.Lineage3 <- scale_values(my_data$Pseudotime.Lineage3)
my_data$cluster <- factor(my_data$cluster, levels = rev(c("Naive","CD27+ CM","CD27+ EM", "CD103+ EM", "CD69+CD103- EM", "CD27- EM", "EMRA")))
plot7 <- ggplot(my_data,
aes(x = Pseudotime.Lineage1, y = cluster,
colour = cluster)) +
ggbeeswarm::geom_quasirandom(groupOnX = FALSE) +
theme_classic() +
xlab("Pseudotime") + ylab("") +
ggtitle("Lineage 1") +
scale_color_manual(values = palette_clusters,name = "Cluster")
plot8 <-ggplot(my_data,
aes(x = Pseudotime.Lineage2, y = cluster,
colour = cluster)) +
ggbeeswarm::geom_quasirandom(groupOnX = FALSE) +
theme_classic() +
xlab("Pseudotime") + ylab("") +
ggtitle("Lineage 2") +
scale_color_manual(values = palette_clusters,name = "Cluster")
plot9 <- ggplot(my_data,
aes(x = Pseudotime.Lineage3, y = cluster,
colour = cluster)) +
ggbeeswarm::geom_quasirandom(groupOnX = FALSE) +
theme_classic() +
xlab("Pseudotime") + ylab("") +
ggtitle("Lineage 3") +
scale_color_manual(values = palette_clusters,name = "Cluster")
grid.arrange(plot7, plot8, plot9, ncol=1, nrow=3)
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3; LAPACK version 3.9.0
##
## locale:
## [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
## [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
## [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
##
## time zone: UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] splines stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] cytofkit2_0.99.80 VGAM_1.1-9
## [3] reticulate_1.32.0 plyr_1.8.9
## [5] slingshot_2.8.0 TrajectoryUtils_1.8.0
## [7] princurve_2.1.6 destiny_3.14.0
## [9] tidyr_1.3.0 dplyr_1.1.3
## [11] reshape2_1.4.4 scater_1.28.0
## [13] scuttle_1.10.2 CATALYST_1.24.0
## [15] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
## [17] Biobase_2.60.0 GenomicRanges_1.52.0
## [19] GenomeInfoDb_1.36.4 IRanges_2.34.1
## [21] S4Vectors_0.38.2 BiocGenerics_0.46.0
## [23] MatrixGenerics_1.12.3 matrixStats_1.0.0
## [25] openCyto_2.12.0 FlowSOM_2.8.0
## [27] igraph_1.5.1 flowVS_1.32.0
## [29] flowStats_4.12.0 flowViz_1.64.0
## [31] lattice_0.21-8 gridExtra_2.3
## [33] ggridges_0.5.4 pheatmap_1.0.12
## [35] ggforce_0.4.1 ggcyto_1.28.1
## [37] flowWorkspace_4.12.2 ncdfFlow_2.46.0
## [39] BH_1.81.0-1 flowCore_2.12.2
## [41] ggrepel_0.9.3 RColorBrewer_1.1-3
## [43] scales_1.2.1 ggplot2_3.4.3
## [45] pander_0.6.5 rmarkdown_2.25
## [47] knitr_1.44
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.3 bitops_1.0-7
## [3] doParallel_1.0.17 Rgraphviz_2.44.0
## [5] tools_4.3.1 backports_1.4.1
## [7] vegan_2.6-4 utf8_1.2.3
## [9] R6_2.5.1 uwot_0.1.16
## [11] mgcv_1.8-42 permute_0.9-7
## [13] GetoptLong_1.0.5 withr_2.5.1
## [15] sp_2.1-0 cli_3.6.1
## [17] Cairo_1.6-1 flowClust_3.38.0
## [19] isoband_0.2.7 sandwich_3.0-2
## [21] labeling_0.4.3 sass_0.4.7
## [23] nnls_1.5 mvtnorm_1.2-3
## [25] robustbase_0.99-0 askpass_1.2.0
## [27] proxy_0.4-27 colorRamps_2.3.1
## [29] rrcov_1.7-4 plotrix_3.8-2
## [31] pdist_1.2.1 TTR_0.24.4
## [33] rstudioapi_0.15.0 generics_0.1.3
## [35] shape_1.4.6 gtools_3.9.4
## [37] car_3.1-2 Matrix_1.6-1.1
## [39] interp_1.1-4 RProtoBufLib_2.12.1
## [41] ggbeeswarm_0.7.2 fansi_1.0.4
## [43] abind_1.4-5 lifecycle_1.0.3
## [45] scatterplot3d_0.3-44 multcomp_1.4-25
## [47] yaml_2.3.7 carData_3.0-5
## [49] gplots_3.1.3 Rtsne_0.16
## [51] grid_4.3.1 promises_1.2.1
## [53] crayon_1.5.2 miniUI_0.1.1.1
## [55] beachmat_2.16.0 cowplot_1.1.1
## [57] pillar_1.9.0 ComplexHeatmap_2.16.0
## [59] rjson_0.2.21 boot_1.3-28.1
## [61] fda_6.1.4 corpcor_1.6.10
## [63] codetools_0.2-19 glue_1.6.2
## [65] pcaMethods_1.92.0 data.table_1.14.8
## [67] vcd_1.4-11 vctrs_0.6.3
## [69] png_0.1-8 gtable_0.3.4
## [71] cachem_1.0.8 ks_1.14.1
## [73] xfun_0.40 mime_0.12
## [75] S4Arrays_1.0.6 RcppEigen_0.3.3.9.3
## [77] fds_1.8 pracma_2.4.2
## [79] ConsensusClusterPlus_1.64.0 pcaPP_2.0-3
## [81] survival_3.5-5 iterators_1.0.14
## [83] cytolib_2.12.1 ellipsis_0.3.2
## [85] TH.data_1.1-2 nlme_3.1-162
## [87] xts_0.13.1 RcppAnnoy_0.0.21
## [89] bslib_0.5.1 irlba_2.3.5.1
## [91] vipor_0.4.5 KernSmooth_2.23-21
## [93] colorspace_2.1-0 nnet_7.3-19
## [95] mnormt_2.1.1 tidyselect_1.2.0
## [97] smoother_1.1 curl_5.1.0
## [99] compiler_4.3.1 graph_1.78.0
## [101] BiocNeighbors_1.18.0 DelayedArray_0.26.7
## [103] colourpicker_1.3.0 caTools_1.18.2
## [105] DEoptimR_1.1-2 lmtest_0.9-40
## [107] hexbin_1.28.3 RBGL_1.76.0
## [109] stringr_1.5.0 digest_0.6.33
## [111] rainbow_3.7 XVector_0.40.0
## [113] htmltools_0.5.6 pkgconfig_2.0.3
## [115] jpeg_0.1-10 umap_0.2.10.0
## [117] sparseMatrixStats_1.12.2 fastmap_1.1.1
## [119] htmlwidgets_1.6.2 rlang_1.1.1
## [121] GlobalOptions_0.1.2 ggthemes_5.0.0
## [123] shiny_1.7.5 DelayedMatrixStats_1.22.6
## [125] farver_2.1.1 jquerylib_0.1.4
## [127] zoo_1.8-12 jsonlite_1.8.7
## [129] BiocParallel_1.34.2 mclust_6.0.0
## [131] BiocSingular_1.16.0 RCurl_1.98-1.12
## [133] magrittr_2.0.3 GenomeInfoDbData_1.2.10
## [135] munsell_0.5.0 Rcpp_1.0.11
## [137] ggnewscale_0.4.9 viridis_0.6.4
## [139] stringi_1.7.12 zlibbioc_1.46.0
## [141] MASS_7.3-60 shinyFiles_0.9.3
## [143] parallel_4.3.1 deldir_1.0-9
## [145] circlize_0.4.15 ggpubr_0.6.0
## [147] ranger_0.16.0 ggsignif_0.6.4
## [149] RcppHNSW_0.5.0 ScaledMatrix_1.8.1
## [151] XML_3.99-0.14 drc_3.0-1
## [153] evaluate_0.22 latticeExtra_0.6-30
## [155] laeken_0.5.2 deSolve_1.38
## [157] httpuv_1.6.11 foreach_1.5.2
## [159] tweenr_2.0.2 VIM_6.2.2
## [161] RANN_2.6.1 openssl_2.1.1
## [163] purrr_1.0.2 polyclip_1.10-6
## [165] knn.covertree_1.0 clue_0.3-65
## [167] rsvd_1.0.5 xtable_1.8-4
## [169] broom_1.0.5 e1071_1.7-13
## [171] RSpectra_0.16-1 later_1.3.1
## [173] rstatix_0.7.2 viridisLite_0.4.2
## [175] IDPmisc_1.1.20 class_7.3-22
## [177] tibble_3.2.1 beeswarm_0.4.0
## [179] cluster_2.1.4 ggplot.multistats_1.0.0
## [181] hdrcde_3.4